import csv

import pandas as pd

from tools.df_data_utils import get_df_data


def read_csv_header(csv_file):
    with open(csv_file, 'r', encoding='utf-8') as file:
        csv_reader = csv.reader(file)
        header = next(csv_reader)  # 读取第一行，即表头行
    return header


def csv_to_excel(csv_file, excel_file, chunk_size=500000, float_columns=[], duplicate_columns=[]):
    """
    将大型 CSV 文件逐块转换为 Excel 文件
    :param csv_file: 输入的 CSV 文件路径
    :param excel_file: 输出的 Excel 文件路径
    :param chunk_size: 每次读取的行数，默认为 100000
    """
    # 读取 CSV 文件并逐块写入 Excel 文件
    with pd.ExcelWriter(excel_file, engine='xlsxwriter') as writer:
        # 将数据写入Excel文件的多个sheet页
        reader = pd.read_csv(csv_file, chunksize=chunk_size, encoding='utf-8', dtype=str)
        for i, chunk in enumerate(reader):
            chunk_copy = chunk.copy()
            chunk_copy.fillna('', inplace=True)
            get_df_data(chunk_copy, float_columns_list=float_columns)
            if len(duplicate_columns) > 0:
                chunk_copy.drop_duplicates(subset=duplicate_columns, inplace=True)
            chunk_copy.to_excel(writer, sheet_name=f'Sheet_{i + 1}', index=False)
